import requests
from lxml import etree
import time
import csv

headers = {
    'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'
}
fp = open('pdsecondhouse.cvs','wt',newline='',encoding='utf-8')
writer = csv.writer(fp)
writer.writerow(('房型','面积','区域','建造时间','单价','总价'))

#定义爬取内容：
def get_body(url_info):
    room_infos = url_info.xpath('div/div/span/text()')[1]
    room  = room_infos.strip().split('|')[0]
    areas = room_infos.strip().split('|')[1].strip()
    addresss = url_info.xpath('div/div/span/a/text()')[1].strip()
    c_times =url_info.xpath('div/div/span[@class="info-col row2-text"]/text()')[-1].split('|')[-1].strip()
    prices = url_info.xpath('div/div/span[@class="info-col price-item minor"]/text()')[0].strip()
    sum_prices = url_info.xpath('div/div/div/span[@class="total-price strong-num"]/text()')[0].strip()
    writer.writerow((room,areas,addresss, c_times, prices, sum_prices))

#定义爬取url信息：
def get_urlinfos(url):
    res = requests.get(url,headers=headers)
    selecort = etree.HTML(res.text)
    url_infos = selecort.xpath('//div[@class="info"]')
    for url_info in url_infos:
        get_body(url_info)

urls = ['http://sh.lianjia.com/ershoufang/pudong/d{}'.format(str(number)) for number in range(101) ]
start = time.time()
for url in urls:
    get_urlinfos(url)
    time.sleep(3)

end = time.time()
print("合计历时：" +str(end-start))
fp.close()
